function [khat]=gapstatkmeans(Aproj_final,PCs,type);
%INPUTS:
%Aproj_final is the matrix of scores from the mzPCA_processed command

%PCs is a number representing the proper number of principal components to
%retain

%type should be either 'uniform' or 'pc' for the two different types of
%gapstats calculations.  See the discussion in Martinez, W. L., Martinez, 
%A. R. and Solka, J. L. Exploratory data analysis with MATLAB. 2nd Ed., CRC
%Press, 2011.

%OUTPUTS:
%khat is the estimation of the number of different clusters that exist,
%based on the analysis of PCA scores.

Aproj_final(PCs+1:end,:)=[];
Aproj_final=Aproj_final';

if (strcmp('uniform',type)==1)
[n,p]=size(Aproj_final);
K=10;
Y=pdist(Aproj_final,'euclidean');
W(1)=sum(pdist(Aproj_final).^2)/(2*n);
for k=2:K
    inds=kmeans(Aproj_final,k,'replicates',5);
    for r=1:k
        indr=find(inds==r);
        nr=length(indr);
        ynr=pdist(Aproj_final(indr,:)).^2;
        D(r)=sum(ynr)/(2*nr);
    end
    W(k)=sum(D);
end
B=10;
minX=min(Aproj_final);
maxX=max(Aproj_final);
Wb=zeros(B,K);
Xb=zeros(n,p);
for b=1:B
    for j=1:p
        Xb(:,j)=unifrnd(minX(j),maxX(j),n,1);
    end
    Wb(b,1)=sum(pdist(Xb).^2)/(2*n);
    for k=2:K
        inds=kmeans(Xb,k,'replicates',5);
        for r=1:k
            indr=find(inds==r);
            nr=length(indr);
            ynr=pdist(Xb(indr,:)).^2;
            D(r)=sum(ynr)/(2*nr);
        end
        Wb(b,k)=sum(D);
    end
            eval(['disp(''Iteration out of 10 Finished:  ', num2str(b), ''');'])
end
Wobs=log(W);
muWb=mean(log(Wb));
sdk=(B-1)*std(log(Wb))/B;
gap=muWb-Wobs;
sk=sdk*sqrt(1+1/B);
gapsk=gap-sk;
ineq=gap(1:9)-gapsk(2:10);
ind=find(ineq > 0);
khat=ind(1);
 
elseif (strcmp('pc',type)==1)
[n,p]=size(Aproj_final);
K=10;
Y=pdist(Aproj_final,'euclidean');
W(1)=sum(pdist(Aproj_final).^2)/(2*n);
[u,s,v]=svd(Aproj_final);
datamcprime=Aproj_final*v;
for k=2:K
    inds=kmeans(Aproj_final,k);
    for r=1:k
        indr=find(inds==r);
        nr=length(indr);
        ynr=pdist(Aproj_final(indr,:)).^2;
        D(r)=sum(ynr)/(2*nr);
    end
    W(k)=sum(D);
end
B=10;
minX=min(datamcprime);
maxX=max(datamcprime);
Wb=zeros(B,K);
Zb=zeros(n,p);
for b=1:B
    for j=1:p
        Zb(:,j)=unifrnd(minX(j),maxX(j),n,1);
    end
    Xb=Zb*v';
    Wb(b,1)=sum(pdist(Xb).^2)/(2*n);
    for k=2:K
        inds=kmeans(Xb,k,'replicates',5);
        for r=1:k
            indr=find(inds==r);
            nr=length(indr);
            ynr=pdist(Xb(indr,:)).^2;
            D(r)=sum(ynr)/(2*nr);
        end
        Wb(b,k)=sum(D);
    end
            eval(['disp(''Iteration out of 10 Finished:  ', num2str(b), ''');'])
end
Wobs=log(W);
muWb=mean(log(Wb));
sdk=(B-1)*std(log(Wb))/B;
gap=muWb-Wobs;
sk=sdk*sqrt(1+1/B);
gapsk=gap-sk;
ineq=gap(1:9)-gapsk(2:10);
ind=find(ineq > 0);
khat=ind(1);
end

%The following generates the Gap Statistic plots
figure;plot(muWb,'b-o','LineWidth',2,'MarkerSize',10)
hold
plot(Wobs,'r-o','LineWidth',2,'MarkerSize',10)
set(gcf, 'color', 'white');
legend('Expected','Observed')
set(gca,'FontSize',16)
xlabel('Cluster Number','FontSize',16)
ylabel('Dispersion','FontSize',16)
set(gca,'YTick',[])